import csv
import pdb
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import pdb


arr = np.genfromtxt("hg00733calls.txt",delimiter='\t',dtype=str)


xhmmcalls = arr[:,15]
deccalls = arr[:,16]
NQs = arr[:,9]
SQs = arr[:,10]
length = arr[:,3]
chrs = arr[:,2]
chrs = np.asarray([x.split(':')[0][3:] for x in chrs])


correctedids = xhmmcalls != deccalls
notcorrectedids = xhmmcalls == deccalls

SQscorrected = length[correctedids]
SQsnotcorrected = length[notcorrectedids]
NQscorrected = [0 for x in SQscorrected]
NQsnotcorrected = [0.1 for x in SQsnotcorrected]



count = 1

for i in np.unique(chrs):


	validids = chrs == str(i)
	correctedids_chr = np.asarray(validids) & np.asarray(correctedids)
	count = np.sum(correctedids_chr)


	notvalidids = chrs != str(i)
	notcorrectedids_chr = np.asarray(notvalidids) & np.asarray(notcorrectedids)
	notcount = np.sum(notcorrectedids_chr)
	if count:
		plt.scatter([i for x in range(count)],length[correctedids_chr], s=250, c='black',marker="*",label='DECoNT corrected true XHMM calls', alpha=0.8)
		plt.scatter([i for x in range(notcount)],length[notcorrectedids_chr],  s=250, c='yellow',marker="*",label='True XHMM calls', alpha=0.8)
	else:
		plt.scatter([i for x in range(count)],length[correctedids_chr], s=250, c='black',marker="*", alpha=0.8)
		plt.scatter([i for x in range(notcount)],length[notcorrectedids_chr],  s=250, c='yellow',marker="*", alpha=0.8)
	count = 0

plt.ylabel("CNV length in kilobase")

plt.xticks(range(len(np.unique(chrs))),np.unique(chrs))

plt.yticks(np.linspace(0,100,5),[str(x)[:4] for x in np.linspace(0,100,5)])

plt.legend(loc='upper left')

plt.show()

pdb.set_trace()
plt.show()

pdb.set_trace()